Q8_8_1 is general programming skills
# reading input
df<-read.csv("input/commits_novelty.csv", header =TRUE, sep=",")
df <- df[complete.cases(df), ]
df
df$group = factor(df$group)
# create new columns called log relational novelty
df$log_relational_novelty <- log(df$similarity+1)
df$log_count <- log(df$count+1)
df$Q7_Q7_1 <- log(df$Q7_Q7_1+1)
df$Q7_Q7_2 <- log(df$Q7_Q7_2+1)
df$Q8_Q8_1 <- log(df$Q8_Q8_1+1)
df$Q10 <- log(df$Q10+1)
df
# standardizing variables for skills and aspirations.
cols <- c("Q7_Q7_1", "Q7_Q7_2", "Q8_Q8_1", "Q10", "log_relational_novelty", "log_count")
df[cols] <- scale(df[cols])
df
mod <- lm(log_count ~ factor(group), data=df)
summary(mod)
Call:
lm(formula = log_count ~ factor(group), data = df)
Residuals:
Min 1Q Median 3Q Max
-1.1938 -0.9742 -0.1165 0.5462 3.4873
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.07037 0.08193 -0.859 0.3907
factor(group)1 0.21956 0.11368 1.931 0.0539 .
factor(group)2 -0.06328 0.11773 -0.537 0.5911
factor(group)3 0.10161 0.11301 0.899 0.3689
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9968 on 607 degrees of freedom
Multiple R-squared: 0.01137, Adjusted R-squared: 0.006485
F-statistic: 2.327 on 3 and 607 DF, p-value: 0.07358
mod <- lm( log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2, data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.1132 -0.4060 0.3574 0.7447 1.3212
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.765e-16 4.012e-02 0.000 1.0000
Q10 1.517e-02 4.341e-02 0.349 0.7269
Q8_Q8_1 8.695e-02 4.473e-02 1.944 0.0524 .
Q7_Q7_1 -6.090e-02 5.007e-02 -1.216 0.2243
Q7_Q7_2 1.201e-01 5.134e-02 2.340 0.0196 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9918 on 606 degrees of freedom
Multiple R-squared: 0.02273, Adjusted R-squared: 0.01628
F-statistic: 3.524 on 4 and 606 DF, p-value: 0.007427
mod <- lm( log_relational_novelty ~ log_count , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ log_count, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2833 -0.4112 0.2627 0.6549 1.4603
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.026e-16 3.743e-02 0.00 1
log_count 3.816e-01 3.746e-02 10.19 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9251 on 609 degrees of freedom
Multiple R-squared: 0.1456, Adjusted R-squared: 0.1442
F-statistic: 103.8 on 1 and 609 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2898 -0.4308 0.2405 0.6320 1.4916
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17946 0.07540 -2.380 0.0176 *
factor(group)1 0.13553 0.10525 1.288 0.1983
factor(group)2 0.25372 0.10878 2.332 0.0200 *
factor(group)3 0.32132 0.10400 3.090 0.0021 **
log_count 0.37362 0.03757 9.945 <2e-16 ***
Q7_Q7_1 -0.03630 0.04638 -0.783 0.4341
Q7_Q7_2 0.09941 0.04769 2.085 0.0375 *
Q8_Q8_1 0.05860 0.04137 1.416 0.1571
Q10 -0.01633 0.04067 -0.401 0.6882
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9145 on 602 degrees of freedom
Multiple R-squared: 0.1746, Adjusted R-squared: 0.1636
F-statistic: 15.92 on 8 and 602 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group)/stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group)/stage + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3715 -0.4255 0.2600 0.6158 1.5685
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.40812 0.18427 -2.215 0.0272 *
factor(group)1 0.26337 0.25541 1.031 0.3029
factor(group)2 0.11213 0.28738 0.390 0.6965
factor(group)3 0.43797 0.25391 1.725 0.0851 .
log_count 0.38109 0.03768 10.115 <2e-16 ***
Q7_Q7_1 -0.03506 0.04631 -0.757 0.4493
Q7_Q7_2 0.09983 0.04762 2.097 0.0365 *
Q8_Q8_1 0.05672 0.04131 1.373 0.1702
Q10 -0.01681 0.04060 -0.414 0.6790
factor(group)0:stage 0.09163 0.06735 1.361 0.1742
factor(group)1:stage 0.03993 0.06457 0.618 0.5365
factor(group)2:stage 0.13611 0.07578 1.796 0.0730 .
factor(group)3:stage 0.04475 0.06377 0.702 0.4832
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9131 on 598 degrees of freedom
Multiple R-squared: 0.1827, Adjusted R-squared: 0.1663
F-statistic: 11.14 on 12 and 598 DF, p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) * stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) * stage +
log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3715 -0.4255 0.2600 0.6158 1.5685
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.40812 0.18427 -2.215 0.0272 *
factor(group)1 0.26337 0.25541 1.031 0.3029
factor(group)2 0.11213 0.28738 0.390 0.6965
factor(group)3 0.43797 0.25391 1.725 0.0851 .
stage 0.09163 0.06735 1.361 0.1742
log_count 0.38109 0.03768 10.115 <2e-16 ***
Q7_Q7_1 -0.03506 0.04631 -0.757 0.4493
Q7_Q7_2 0.09983 0.04762 2.097 0.0365 *
Q8_Q8_1 0.05672 0.04131 1.373 0.1702
Q10 -0.01681 0.04060 -0.414 0.6790
factor(group)1:stage -0.05169 0.09326 -0.554 0.5796
factor(group)2:stage 0.04449 0.10118 0.440 0.6603
factor(group)3:stage -0.04688 0.09276 -0.505 0.6135
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9131 on 598 degrees of freedom
Multiple R-squared: 0.1827, Adjusted R-squared: 0.1663
F-statistic: 11.14 on 12 and 598 DF, p-value: < 2.2e-16
# Proposed model by stepwise regression
library(stats)
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2247 -0.4206 0.2416 0.6378 1.5600
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.18350 0.07530 -2.437 0.01510 *
factor(group)1 0.13586 0.10467 1.298 0.19480
factor(group)2 0.26234 0.10837 2.421 0.01578 *
factor(group)3 0.32876 0.10377 3.168 0.00161 **
log_count 0.37742 0.03726 10.129 < 2e-16 ***
Q7_Q7_2 0.09265 0.03718 2.492 0.01296 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9141 on 605 degrees of freedom
Multiple R-squared: 0.1712, Adjusted R-squared: 0.1644
F-statistic: 25 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod)
[1] 1632.175
BIC(mod)
[1] 1663.081
# without the factor ( group ) and with all confounding variables
library(stats)
mod <- lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3412 -0.3826 0.2582 0.6443 1.4960
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.660e-16 3.724e-02 0.000 1.0000
log_count 3.739e-01 3.764e-02 9.932 <2e-16 ***
Q7_Q7_1 -3.929e-02 4.651e-02 -0.845 0.3986
Q7_Q7_2 1.070e-01 4.767e-02 2.244 0.0252 *
Q8_Q8_1 6.659e-02 4.156e-02 1.602 0.1096
Q10 -2.472e-02 4.049e-02 -0.611 0.5418
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9204 on 605 degrees of freedom
Multiple R-squared: 0.1597, Adjusted R-squared: 0.1528
F-statistic: 23 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod)
[1] 1640.598
BIC(mod)
[1] 1671.504
library(stats)
mod.1 <- lm( log_relational_novelty ~ log_count + Q7_Q7_2 , data = df)
summary(mod.1)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2719 -0.3984 0.2622 0.6406 1.4734
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.208e-16 3.724e-02 0.000 1.00000
log_count 3.772e-01 3.731e-02 10.111 < 2e-16 ***
Q7_Q7_2 9.928e-02 3.731e-02 2.661 0.00799 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9205 on 608 degrees of freedom
Multiple R-squared: 0.1554, Adjusted R-squared: 0.1527
F-statistic: 55.95 on 2 and 608 DF, p-value: < 2.2e-16
AIC(mod.1)
[1] 1637.722
BIC(mod.1)
[1] 1655.382
library(stats)
mod.2 <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod.2)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2247 -0.4206 0.2416 0.6378 1.5600
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.18350 0.07530 -2.437 0.01510 *
factor(group)1 0.13586 0.10467 1.298 0.19480
factor(group)2 0.26234 0.10837 2.421 0.01578 *
factor(group)3 0.32876 0.10377 3.168 0.00161 **
log_count 0.37742 0.03726 10.129 < 2e-16 ***
Q7_Q7_2 0.09265 0.03718 2.492 0.01296 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9141 on 605 degrees of freedom
Multiple R-squared: 0.1712, Adjusted R-squared: 0.1644
F-statistic: 25 on 5 and 605 DF, p-value: < 2.2e-16
AIC(mod.2)
[1] 1632.175
BIC(mod.2)
[1] 1663.081
# model with and without groups are very different ( significant )
anova(mod.1, mod.2)
Analysis of Variance Table
Model 1: log_relational_novelty ~ log_count + Q7_Q7_2
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2
Res.Df RSS Df Sum of Sq F Pr(>F)
1 608 515.18
2 605 505.54 3 9.6445 3.8473 0.009558 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
Loading required package: leaps
Loading required package: SuppDists
Loading required package: car
Loading required package: carData
step(lm(log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-100.24
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q10 1 0.135 503.63 -102.081
- Q7_Q7_1 1 0.512 504.01 -101.623
<none> 503.49 -100.244
- Q8_Q8_1 1 1.678 505.17 -100.211
- Q7_Q7_2 1 3.634 507.13 -97.850
- factor(group) 3 9.062 512.56 -95.345
- log_count 1 82.715 586.21 -9.308
Step: AIC=-102.08
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1
Df Sum of Sq RSS AIC
- Q7_Q7_1 1 0.517 504.15 -103.454
- Q8_Q8_1 1 1.544 505.17 -102.211
<none> 503.63 -102.081
- Q7_Q7_2 1 3.522 507.15 -99.823
- factor(group) 3 9.243 512.87 -96.968
- log_count 1 82.749 586.38 -11.132
Step: AIC=-103.45
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 +
Q8_Q8_1
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 1.393 505.54 -103.768
<none> 504.15 -103.454
- Q7_Q7_2 1 3.164 507.31 -101.631
- factor(group) 3 9.349 513.49 -98.227
- log_count 1 83.685 587.83 -11.620
Step: AIC=-103.77
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2
Df Sum of Sq RSS AIC
<none> 505.54 -103.768
- Q7_Q7_2 1 5.190 510.73 -99.527
- factor(group) 3 9.645 515.18 -98.221
- log_count 1 85.723 591.26 -10.064
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_2, data = df)
Coefficients:
(Intercept) factor(group)1 factor(group)2 factor(group)3 log_count Q7_Q7_2
-0.18350 0.13586 0.26234 0.32876 0.37742 0.09265
mod <- lm( log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2 , data = df)
summary(mod)
Call:
lm(formula = log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 +
Q7_Q7_1 + Q7_Q7_2, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.1926 -0.4578 0.3261 0.7213 1.4289
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.20107 0.08126 -2.474 0.01362 *
factor(group)1 0.20523 0.11322 1.813 0.07038 .
factor(group)2 0.22874 0.11725 1.951 0.05154 .
factor(group)3 0.35502 0.11207 3.168 0.00161 **
Q10 0.01822 0.04369 0.417 0.67679
Q8_Q8_1 0.07857 0.04455 1.764 0.07827 .
Q7_Q7_1 -0.06112 0.04993 -1.224 0.22137
Q7_Q7_2 0.11639 0.05138 2.265 0.02386 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.986 on 603 degrees of freedom
Multiple R-squared: 0.039, Adjusted R-squared: 0.02785
F-statistic: 3.496 on 7 and 603 DF, p-value: 0.001099
Nest Phase in Group [ Linear Mixed Model ]
# convert to nominal factor
df$user2 = factor(df$user2)
df$stage = factor(df$stage)
df$group = factor(df$group)
# explore the data and their levels
library(plyr)
ddply(df, ~ group * stage, function(data) summary(data$log_relational_novelty) )
ddply(df, ~ group * stage, summarise, log_relational_novelty.mean=mean(log_relational_novelty), log_relational_novelty.sd = sd(log_relational_novelty))
# histograms for two factors
hist(df[df$group == 0 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 4,]$log_relational_novelty)

boxplot(log_relational_novelty ~ group * stage, data = df, xlab="Group.Stage", ylab="log_relational_novelty")

with(df, interaction.plot(group, stage, log_relational_novelty, ylim=c(0, max(log_relational_novelty)))) # interaction plot

# library for LMM we will use on relational novelty
library(lme4)
library(lmerTest)
library(car)
variability is very much higher in individual user and than in stages/phases for relational novelty. The remaining variability of 0.203502 comes from factor other than individual users and stage. factor(group)1 has higher relational novelty than group 0 by about 0.2099 . factor(group)3 has higher relational novelty than group 0 by about 0.3765.
var.model = lmer( log_relational_novelty ~ factor(group) + ( 1 | user2) + ( 1 | stage), data = df)
summary(var.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_relational_novelty ~ factor(group) + (1 | user2) + (1 | stage)
Data: df
REML criterion at convergence: 1203.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.2978 -0.2484 0.0522 0.2410 3.5006
Random effects:
Groups Name Variance Std.Dev.
user2 (Intercept) 0.789432 0.88850
stage (Intercept) 0.002792 0.05284
Residual 0.203477 0.45108
Number of obs: 611, groups: user2, 157; stage, 4
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.2208 0.1530 146.2072 -1.443 0.151
factor(group)1 0.2099 0.2103 152.3350 0.998 0.320
factor(group)2 0.2659 0.2097 154.1335 1.268 0.207
factor(group)3 0.3765 0.2079 152.4574 1.812 0.072 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) fct()1 fct()2
factr(grp)1 -0.706
factr(grp)2 -0.708 0.515
factr(grp)3 -0.714 0.519 0.521
reduced.model = lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(reduced.model)
Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.3412 -0.3826 0.2582 0.6443 1.4960
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.660e-16 3.724e-02 0.000 1.0000
log_count 3.739e-01 3.764e-02 9.932 <2e-16 ***
Q7_Q7_1 -3.929e-02 4.651e-02 -0.845 0.3986
Q7_Q7_2 1.070e-01 4.767e-02 2.244 0.0252 *
Q8_Q8_1 6.659e-02 4.156e-02 1.602 0.1096
Q10 -2.472e-02 4.049e-02 -0.611 0.5418
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9204 on 605 degrees of freedom
Multiple R-squared: 0.1597, Adjusted R-squared: 0.1528
F-statistic: 23 on 5 and 605 DF, p-value: < 2.2e-16
full.model = lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(full.model)
Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.2898 -0.4308 0.2405 0.6320 1.4916
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.17946 0.07540 -2.380 0.0176 *
factor(group)1 0.13553 0.10525 1.288 0.1983
factor(group)2 0.25372 0.10878 2.332 0.0200 *
factor(group)3 0.32132 0.10400 3.090 0.0021 **
log_count 0.37362 0.03757 9.945 <2e-16 ***
Q7_Q7_1 -0.03630 0.04638 -0.783 0.4341
Q7_Q7_2 0.09941 0.04769 2.085 0.0375 *
Q8_Q8_1 0.05860 0.04137 1.416 0.1571
Q10 -0.01633 0.04067 -0.401 0.6882
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9145 on 602 degrees of freedom
Multiple R-squared: 0.1746, Adjusted R-squared: 0.1636
F-statistic: 15.92 on 8 and 602 DF, p-value: < 2.2e-16
anova(reduced.model, full.model)
Analysis of Variance Table
Model 1: log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 605 512.56
2 602 503.49 3 9.0623 3.6117 0.01317 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
boxplot(log_relational_novelty~ stage*group,
col=c("white","lightgray", "blue", "green"),df)

check_collinearity(full.model)
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
factor(group) 1.05 [1.01, 1.27] 1.03 0.95 [0.79, 0.99]
log_count 1.03 [1.00, 1.48] 1.01 0.97 [0.67, 1.00]
Q7_Q7_1 1.57 [1.42, 1.76] 1.25 0.64 [0.57, 0.70]
Q7_Q7_2 1.66 [1.50, 1.87] 1.29 0.60 [0.54, 0.67]
Q8_Q8_1 1.25 [1.16, 1.40] 1.12 0.80 [0.72, 0.87]
Q10 1.21 [1.12, 1.35] 1.10 0.83 [0.74, 0.89]
library(car)
vif(full.model)
GVIF Df GVIF^(1/(2*Df))
factor(group) 1.053501 3 1.008724
log_count 1.029442 1 1.014614
Q7_Q7_1 1.568771 1 1.252506
Q7_Q7_2 1.658769 1 1.287932
Q8_Q8_1 1.248089 1 1.117179
Q10 1.206293 1 1.098314
vif(reduced.model)
log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1 Q10
1.020281 1.557734 1.635974 1.243664 1.180182
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(lsmeans)
Loading required package: emmeans
The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.
#summary(glht(full.model, lsm(pairwise ~ group / stage)), test = adjusted(type='holm'))
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBRMTAgaXMgYXNwaXJhdGlvbnMgCiMgUTdfN18xIGlzIGphdmFzY3JpcHQgc2tpbGxzIAojIFE3XzdfMiBpcyBtYXNodXAgc2tpbGxzIAojIFE4XzhfMSBpcyBnZW5lcmFsIHByb2dyYW1taW5nIHNraWxscyAKCmBgYHtyfQojIHJlYWRpbmcgaW5wdXQgCmRmPC1yZWFkLmNzdigiaW5wdXQvY29tbWl0c19ub3ZlbHR5LmNzdiIsIGhlYWRlciA9VFJVRSwgc2VwPSIsIikKZGYgPC0gZGZbY29tcGxldGUuY2FzZXMoZGYpLCBdICAKZGYKYGBgCgpgYGB7cn0KZGYkZ3JvdXAgPSBmYWN0b3IoZGYkZ3JvdXApCmBgYAoKCmBgYHtyfQojIGNyZWF0ZSBuZXcgY29sdW1ucyBjYWxsZWQgbG9nIHJlbGF0aW9uYWwgbm92ZWx0eQpkZiRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IDwtIGxvZyhkZiRzaW1pbGFyaXR5KzEpIApkZiRsb2dfY291bnQgPC0gbG9nKGRmJGNvdW50KzEpIApkZiRRN19RN18xIDwtIGxvZyhkZiRRN19RN18xKzEpCmRmJFE3X1E3XzIgPC0gbG9nKGRmJFE3X1E3XzIrMSkKZGYkUThfUThfMSA8LSBsb2coZGYkUThfUThfMSsxKQpkZiRRMTAgPC0gbG9nKGRmJFExMCsxKQpkZgpgYGAKCgpgYGB7cn0KIyBzdGFuZGFyZGl6aW5nIHZhcmlhYmxlcyBmb3Igc2tpbGxzIGFuZCBhc3BpcmF0aW9ucy4gCmNvbHMgPC0gYygiUTdfUTdfMSIsICJRN19RN18yIiwgIlE4X1E4XzEiLCAiUTEwIiwgImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiLCAibG9nX2NvdW50IikKZGZbY29sc10gPC0gc2NhbGUoZGZbY29sc10pCmRmCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0obG9nX2NvdW50IH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkvc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICogc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCgoKYGBge3J9CiMgUHJvcG9zZWQgbW9kZWwgYnkgc3RlcHdpc2UgcmVncmVzc2lvbgpsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgbG9nX2NvdW50ICsgUTdfUTdfMiAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCkFJQyhtb2QpCkJJQyhtb2QpCmBgYAoKYGBge3J9CiMgd2l0aG91dCB0aGUgZmFjdG9yICggZ3JvdXAgKSBhbmQgd2l0aCBhbGwgY29uZm91bmRpbmcgdmFyaWFibGVzIApsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpBSUMobW9kKQpCSUMobW9kKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMSA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kLjEpCkFJQyhtb2QuMSkKQklDKG1vZC4xKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMiA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18yICwgZGF0YSA9IGRmKQpzdW1tYXJ5KG1vZC4yKQpBSUMobW9kLjIpCkJJQyhtb2QuMikKYGBgCgpgYGB7cn0KIyBtb2RlbCB3aXRoIGFuZCB3aXRob3V0IGdyb3VwcyBhcmUgdmVyeSBkaWZmZXJlbnQgKCBzaWduaWZpY2FudCApCmFub3ZhKG1vZC4xLCBtb2QuMikKYGBgCgoKYGBge3J9CmxpYnJhcnkoQUxTTSkKc3RlcChsbShsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YT1kZiksCm1ldGhvZD0iYm90aCIsIHRyYWNlID0gMSApCmBgYAoKCgoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCiMjIE5lc3QgUGhhc2UgaW4gR3JvdXAgWyBMaW5lYXIgTWl4ZWQgTW9kZWwgXQoKYGBge3J9CiMgY29udmVydCB0byBub21pbmFsIGZhY3RvcgpkZiR1c2VyMiA9IGZhY3RvcihkZiR1c2VyMikKZGYkc3RhZ2UgPSBmYWN0b3IoZGYkc3RhZ2UpCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpgYGAKCmBgYHtyfQojIGV4cGxvcmUgdGhlIGRhdGEgYW5kIHRoZWlyIGxldmVscyAKbGlicmFyeShwbHlyKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBmdW5jdGlvbihkYXRhKSBzdW1tYXJ5KGRhdGEkbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkgKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBzdW1tYXJpc2UsIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkubWVhbj1tZWFuKGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpLCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5LnNkID0gc2QobG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkpCmBgYApgYGB7cn0KIyBoaXN0b2dyYW1zIGZvciB0d28gZmFjdG9ycwpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAwICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMCAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gMSxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMSAmIGRmJHN0YWdlID09IDIsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDEgJiBkZiRzdGFnZSA9PSAzLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gNCxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDEsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDIgJiBkZiRzdGFnZSA9PSAyLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAyICYgZGYkc3RhZ2UgPT0gMyxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDQsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAzICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMyAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKYm94cGxvdChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZ3JvdXAgKiBzdGFnZSwgZGF0YSA9IGRmLCB4bGFiPSJHcm91cC5TdGFnZSIsIHlsYWI9ImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiKQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBzdGFnZSwgbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSwgeWxpbT1jKDAsIG1heChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KSkpKSAjIGludGVyYWN0aW9uIHBsb3QKYGBgCmBgYHtyfQojIGxpYnJhcnkgZm9yIExNTSB3ZSB3aWxsIHVzZSBvbiByZWxhdGlvbmFsIG5vdmVsdHkgCgpsaWJyYXJ5KGxtZTQpCmxpYnJhcnkobG1lclRlc3QpCmxpYnJhcnkoY2FyKQpgYGAKCiMgc2V0IHN1bS10by16ZXJvIGNvbnRyYXN0cyBmb3IgdGhlIEFub3ZhIGNlbGxzIAoKYGBge3J9CmNvbnRyYXN0cyhkZiRncm91cCkgPD0gImNvbnRyLnN1bSIKY29udHJhc3RzKGRmJHN0YWdlKSA8PSAiY29udHIuc3VtIgpgYGAKCgpgYGB7cn0KIyBzdGFnZSBuZXN0ZWQgd2l0aGluIGdyb3VwIApmdWxsLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGdyb3VwL3N0YWdlICsgKDEgfCB1c2VyMiApLCBkYXRhID0gZGYsIFJFTUwgPSBGQUxTRSkKQW5vdmEoZnVsbC5tb2RlbCwgdHlwZT0zLCB0ZXN0LnN0YXRpc3RpY3M9IkYiKQpmdWxsLm1vZGVsCmBgYApgYGB7cn0KbGlicmFyeShwZXJmb3JtYW5jZSkKCmNoZWNrX2NvbGxpbmVhcml0eShmdWxsLm1vZGVsKQoKCmBgYAojIHZhcmlhYmlsaXR5IGlzIHZlcnkgbXVjaCBoaWdoZXIgaW4gaW5kaXZpZHVhbCB1c2VyIGFuZCB0aGFuIGluIHN0YWdlcy9waGFzZXMgZm9yIHJlbGF0aW9uYWwgbm92ZWx0eS4gVGhlIHJlbWFpbmluZyB2YXJpYWJpbGl0eSBvZiAgMC4yMDM1MDIgY29tZXMgZnJvbSBmYWN0b3Igb3RoZXIgdGhhbiBpbmRpdmlkdWFsIHVzZXJzIGFuZCBzdGFnZS4gZmFjdG9yKGdyb3VwKTEgIGhhcyBoaWdoZXIgcmVsYXRpb25hbCBub3ZlbHR5IHRoYW4gZ3JvdXAgMCBieSBhYm91dCAgMC4yMDk5IC4gZmFjdG9yKGdyb3VwKTMgaGFzIGhpZ2hlciByZWxhdGlvbmFsIG5vdmVsdHkgdGhhbiBncm91cCAwIGJ5IGFib3V0IDAuMzc2NS4gCgpgYGB7cn0KdmFyLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyAoIDEgfCB1c2VyMikgKyAoIDEgfCBzdGFnZSksIGRhdGEgPSBkZikKc3VtbWFyeSh2YXIubW9kZWwpCmBgYAoKCmBgYHtyfQpyZWR1Y2VkLm1vZGVsID0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAsIGRhdGEgPSBkZikKc3VtbWFyeShyZWR1Y2VkLm1vZGVsKQpgYGAKCgpgYGB7cn0KZnVsbC5tb2RlbCA9IGxtKCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YSA9IGRmKQpzdW1tYXJ5KGZ1bGwubW9kZWwpCmBgYAoKYGBge3J9CmFub3ZhKHJlZHVjZWQubW9kZWwsIGZ1bGwubW9kZWwpCmBgYAoKCmBgYHtyfQpib3hwbG90KGxvZ19yZWxhdGlvbmFsX25vdmVsdHl+IHN0YWdlKmdyb3VwLApjb2w9Yygid2hpdGUiLCJsaWdodGdyYXkiLCAiYmx1ZSIsICJncmVlbiIpLGRmKQpgYGAKCmBgYHtyfQpjaGVja19jb2xsaW5lYXJpdHkoZnVsbC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShjYXIpCgp2aWYoZnVsbC5tb2RlbCkKYGBgCmBgYHtyfQp2aWYocmVkdWNlZC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShtdWx0Y29tcCkKbGlicmFyeShsc21lYW5zKQojc3VtbWFyeShnbGh0KGZ1bGwubW9kZWwsIGxzbShwYWlyd2lzZSB+IGdyb3VwIC8gc3RhZ2UpKSwgdGVzdCA9IGFkanVzdGVkKHR5cGU9J2hvbG0nKSkKYGBgCgo=